# -*- coding: utf-8 -*-
import re
import pymysql.cursors

# 匹配标题
pattern1 = re.compile(r'(.*[（\(](\d)*-[\d+=，]*[）\)])')
# 匹配作者名
pattern2 = re.compile(r'([\u4e00-\u9fa5\s]{2,10}\d\).*)')
# 匹配作者信息
pattern3 = re.compile(r'((\d\)[\u4e00-\u9fa5]*).*\d{6}.*)')
# 匹配摘要
pattern4 = re.compile(r'摘要[\s\S]*?关键词')
# 匹配关键词
pattern5 = re.compile(r'关键词.*')


with open('temp.txt','r',encoding='utf-8') as f:
    data=f.read()

result1 = pattern1.findall(data)
result2 = pattern2.findall(data)
result3 = pattern3.findall(data)
result4 = pattern4.findall(data)
result5 = pattern5.findall(data)

'''
数据库操作
'''
#获取数据库链接
connection = pymysql.connect(host = '',
                            user = '',
                            password = '',
                            db = '',
                            charset = 'utf8')

info_list = []
for i in range(len(result1)):
    result5[i] = result5[i].strip('关键词：')
    # 2005,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018年有摘要
    result4[i] = result4[i].strip('\n').strip('关键词').strip('摘要：').strip('摘要:')
    info_list.append(('2018',str(result1[i][0]),str(result2[i]),str(result3[i][0]),str(result4[i]),str(result5[i])))
    # 2006,2007,2008年没有摘要
    # info_list.append(('2008',str(result1[i][0]),str(result2[i]),str(result3[i][0]),str(result5[i])))

try:
    #获取会话指针
    with connection.cursor() as cursor:
        #创建sql语句
        # 全都有的
        # sql = "insert into `temp` (`years`,`title`,`author`,`author_info`,`zhaiyao`,`guanjiang`) values (%s,%s,%s,%s,%s,%s)"
        sql = "insert into `lunwen` (`years`,`title`,`author`,`author_info`,`zhaiyao`,`guanjiang`) values (%s,%s,%s,%s,%s,%s)"
        # 没有摘要的
        # sql = "insert into `lunwen` (`years`,`title`,`author`,`author_info`,`guanjiang`) values (%s,%s,%s,%s,%s)"
        #执行sql语句
        cursor.executemany(sql,info_list)
        #提交数据库
        connection.commit()
finally:
    connection.close()